# 过拟合：当增加多项式回归中高次方项的时候，可能会带来在训练数据上表现优异，而测试数据非常差的情况，这叫做过拟合
"""
1.减少高次方项特征
2.降低模型复杂度
3.减少特征本身(PCA方法)
4.增大数据量（样本）
5.数据清洗
"""
# 欠拟合：训练数据和测试数据指标都差
"""
1.增加高次项
2.模型复杂化
3.增加特征
"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_iris
# from 


data = pd.read_csv('flower.csv')

y = data[['petal width (cm)','petal length (cm)']]
X = data[['sepal width (cm)','sepal length (cm)']]
y = np.array(y)
X = np.array(X)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.15)

# print(X_train.shape)            # 输出形状
pf = PolynomialFeatures(degree=3)
pf.fit(X_train)
X_train = pf.transform(X_train)
# print(X_train.shape)


lr = LinearRegression()
lr.fit(X_train,y_train)

X_test = pf.transform(X_test)

mes = mean_squared_error(lr.predict(X_test),y_test)
mse = mean_squared_error(lr.predict(X_train),y_train)
print(mes,mse)
